Introduction
This simulation models a plant population with multiple life history
traits using AlphaSimR. The model includes size-dependent reproductive
traits and exponential decay in QTL effect sizes to simulate realistic
genetic architecture.
Load Required Libraries
library(AlphaSimR)
library(tidyverse)
library(ggplot2)
library(ggthemes)
library(ggdoctheme)
library(dplyr)
library(patchwork)
library(gridExtra)
library(knitr)
library(rstatix)
# Set seed for reproducibility
set.seed(123)
Simulation Parameters
# Population parameters
n_individuals = 1000
n_loci = 100
n_traits = 6
# Trait names
trait_names = c("Germination", "Initial_Size", "Establishment",
"Growth_Rate", "Flowering_Prob", "Fruit_Per_Plant")
trait_means = c(0.5, 2, 0.6, 1.2, 0.3, 2) # Mean values for traits
# Heritabilities for each trait
h2 <- c(0.6, 0.5, 0.7, 0.4, 0.3, 0.4)
names(h2) <- trait_names
print("Simulation Parameters:")
## [1] "Simulation Parameters:"
print(paste("Number of individuals:", n_individuals))
## [1] "Number of individuals: 1000"
print(paste("Number of loci per trait:", n_loci))
## [1] "Number of loci per trait: 100"
print(paste("Number of traits:", n_traits))
## [1] "Number of traits: 6"
print("Heritabilities:")
## [1] "Heritabilities:"
print(h2)
## Germination Initial_Size Establishment Growth_Rate Flowering_Prob
## 0.6 0.5 0.7 0.4 0.3
## Fruit_Per_Plant
## 0.4
Genetic Architecture Setup
# Create founder genomes
founderPop = runMacs(nInd = 20, # Small founder population
nChr = 10, # 10 chromosomes
segSites = n_loci * n_traits / 10, # Distribute loci across chromosomes
species = "GENERIC")
# Set up simulation parameters
SP = SimParam$new(founderPop)
# Add traits to simulation parameters
SP$addTraitA(nQtlPerChr = n_loci/10, # Distribute across chromosomes
mean = trait_means,
var = c(1,1,1,1,1,1),
gamma = TRUE,
shape = 1,
name = trait_names)
# Set heritabilities
SP$setVarE(h2 = h2)
print("Genetic architecture established with exponential decay in effect sizes")
## [1] "Genetic architecture established with exponential decay in effect sizes"
Generate F2 Population
# Create initial parents from founders
parents = newPop(founderPop, simParam = SP)
# Create F1 by random mating
F1 = randCross(parents, nCrosses = 500, simParam = SP)
# Create F2 population
F2 = randCross(F1, nCrosses = n_individuals, simParam = SP)
print(paste("Generated F2 population with", F2@nInd, "individuals"))
## [1] "Generated F2 population with 1000 individuals"
Compare Genetic Values vs Phenotypes in F2
# Create comparison data frame for F2
comparison_data = data.frame(
Individual = rep(1:nrow(gv_F2), n_traits),
Trait = rep(trait_names, each = nrow(gv_F2)),
Genetic_Value = as.vector(gv_F2),
Phenotype = as.vector(pheno_F2)
)
# Calculate correlations between genetic values and phenotypes for each trait
correlations = sapply(1:n_traits, function(i) {
cor(gv_F2[,i], pheno_F2[,i])
})
names(correlations) = trait_names
# Create summary table
correlation_summary = data.frame(
Trait = trait_names,
Heritability = h2,
GV_Pheno_Correlation = correlations,
Expected_Correlation = sqrt(h2) # Theoretical expectation
)
kable(correlation_summary, digits = 3,
caption = "Genetic Value vs Phenotype Correlations")
Genetic Value vs Phenotype Correlations
| Germination |
Germination |
0.6 |
0.788 |
0.775 |
| Initial_Size |
Initial_Size |
0.5 |
0.652 |
0.707 |
| Establishment |
Establishment |
0.7 |
0.871 |
0.837 |
| Growth_Rate |
Growth_Rate |
0.4 |
0.603 |
0.632 |
| Flowering_Prob |
Flowering_Prob |
0.3 |
0.538 |
0.548 |
| Fruit_Per_Plant |
Fruit_Per_Plant |
0.4 |
0.626 |
0.632 |
print("Correlations between genetic values and phenotypes:")
## [1] "Correlations between genetic values and phenotypes:"
print(round(correlations, 3))
## Germination Initial_Size Establishment Growth_Rate Flowering_Prob
## 0.788 0.652 0.871 0.603 0.538
## Fruit_Per_Plant
## 0.626
Visualization: Genetic Values vs Phenotypes
# Create scatter plots for each trait
plot_list = list()
for(i in 1:n_traits) {
trait_data_all = data.frame(
GV = gv_F2[,i],
Phenotype = pheno_F2[,i]
)
p = ggplot(trait_data_all, aes(x = GV, y = Phenotype)) +
geom_point(alpha = 0.6, color = "blue") +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(title = paste(trait_names[i]),
subtitle = paste("r =", round(correlations[i], 3),
"| h² =", h2[i]),
x = "Genetic Value (Breeding Value)",
y = "Phenotype") +
theme_minimal() +
theme(plot.title = element_text(size = 12, hjust = 0.5))
plot_list[[i]] = p
}
# Arrange plots
do.call(grid.arrange, c(plot_list, ncol = 3))

Phenotype Distributions Across Generations
# Prepare data for plotting distributions across generations
# Fix the trait assignment
dist_data <- data.frame(
Generation = c(rep("Parents", nrow(pheno_parents) * n_traits),
rep("F1", nrow(pheno_F1) * n_traits),
rep("F2", nrow(pheno_F2) * n_traits)),
Trait = c(rep(trait_names, nrow(pheno_parents)),
rep(trait_names, nrow(pheno_F1)),
rep(trait_names, nrow(pheno_F2))),
Phenotype = c(as.vector(t(pheno_parents)),
as.vector(t(pheno_F1)),
as.vector(t(pheno_F2)))
)
# Set generation as factor with proper order
dist_data$Generation <- factor(dist_data$Generation,
levels = c("Parents", "F1", "F2"))
print("Distribution data prepared")
## [1] "Distribution data prepared"
print("Summary of phenotype distributions:")
## [1] "Summary of phenotype distributions:"
summary_stats <- dist_data %>%
group_by(Generation, Trait) %>%
summarise(
Mean = mean(Phenotype),
SD = sd(Phenotype),
Min = min(Phenotype),
Max = max(Phenotype),
.groups = "drop"
)
kable(summary_stats, digits = 3, caption = "Phenotype Summary Statistics by Generation")
Phenotype Summary Statistics by Generation
| Parents |
Establishment |
0.632 |
1.319 |
-2.018 |
3.181 |
| Parents |
Flowering_Prob |
-0.233 |
1.939 |
-4.421 |
3.849 |
| Parents |
Fruit_Per_Plant |
2.297 |
1.909 |
-0.394 |
6.326 |
| Parents |
Germination |
0.251 |
1.181 |
-1.680 |
2.728 |
| Parents |
Growth_Rate |
1.204 |
1.983 |
-2.652 |
4.859 |
| Parents |
Initial_Size |
1.562 |
1.343 |
-1.121 |
3.641 |
| F1 |
Establishment |
0.536 |
1.240 |
-2.936 |
3.991 |
| F1 |
Flowering_Prob |
0.327 |
1.839 |
-5.599 |
6.471 |
| F1 |
Fruit_Per_Plant |
1.960 |
1.668 |
-2.447 |
6.666 |
| F1 |
Germination |
0.517 |
1.282 |
-3.443 |
4.043 |
| F1 |
Growth_Rate |
1.268 |
1.489 |
-3.294 |
5.529 |
| F1 |
Initial_Size |
1.957 |
1.357 |
-1.756 |
6.609 |
| F2 |
Establishment |
0.532 |
1.283 |
-3.646 |
4.905 |
| F2 |
Flowering_Prob |
0.281 |
1.776 |
-7.082 |
6.246 |
| F2 |
Fruit_Per_Plant |
1.982 |
1.633 |
-3.838 |
7.482 |
| F2 |
Germination |
0.560 |
1.321 |
-3.921 |
4.926 |
| F2 |
Growth_Rate |
1.206 |
1.591 |
-4.413 |
6.025 |
| F2 |
Initial_Size |
1.981 |
1.343 |
-2.812 |
7.391 |
Density Plots for Better Comparison
# Create density plots for better comparison across generations
density_plots = list()
for(i in 1:n_traits) {
trait_subset = dist_data[dist_data$Trait == trait_names[i], ]
p = ggplot(trait_subset, aes(x = Phenotype, color = Generation, fill = Generation)) +
geom_density(alpha = 0.3, size = 1) +
scale_color_manual(values = c("Parents" = "red", "F1" = "green", "F2" = "blue")) +
scale_fill_manual(values = c("Parents" = "red", "F1" = "green", "F2" = "blue")) +
labs(title = paste("Phenotype Density:", trait_names[i]),
x = "Phenotype Value",
y = "Density") +
theme_minimal() +
theme(legend.position = "bottom")
density_plots[[i]] = p
}
# Arrange density plots
do.call(grid.arrange, c(density_plots, ncol = 3))

Variance Components Analysis
# Calculate variance components for each trait in each generation
variance_analysis <- data.frame(
Trait = rep(trait_names, 3),
Generation = rep(c("Parents", "F1", "F2"), each = n_traits),
Phenotypic_Variance = c(apply(pheno_parents, 2, var),
apply(pheno_F1, 2, var),
apply(pheno_F2, 2, var)),
Genetic_Variance = c(apply(gv_parents, 2, var),
apply(gv_F1, 2, var),
apply(gv_F2, 2, var))
)
# Calculate environmental variance
variance_analysis$Environmental_Variance <- variance_analysis$Phenotypic_Variance -
variance_analysis$Genetic_Variance
# Calculate realized heritability
variance_analysis$Realized_Heritability <- variance_analysis$Genetic_Variance /
variance_analysis$Phenotypic_Variance
kable(variance_analysis, digits = 3,
caption = "Variance Components Analysis by Generation")
Variance Components Analysis by Generation
| Germination |
Parents |
1.394 |
1.053 |
0.342 |
0.755 |
| Initial_Size |
Parents |
1.805 |
1.053 |
0.752 |
0.583 |
| Establishment |
Parents |
1.741 |
1.053 |
0.688 |
0.605 |
| Growth_Rate |
Parents |
3.934 |
1.053 |
2.881 |
0.268 |
| Flowering_Prob |
Parents |
3.761 |
1.053 |
2.708 |
0.280 |
| Fruit_Per_Plant |
Parents |
3.646 |
1.053 |
2.593 |
0.289 |
| Germination |
F1 |
1.644 |
0.998 |
0.646 |
0.607 |
| Initial_Size |
F1 |
1.842 |
0.820 |
1.022 |
0.445 |
| Establishment |
F1 |
1.537 |
1.116 |
0.421 |
0.726 |
| Growth_Rate |
F1 |
2.217 |
0.819 |
1.398 |
0.369 |
| Flowering_Prob |
F1 |
3.383 |
0.941 |
2.442 |
0.278 |
| Fruit_Per_Plant |
F1 |
2.782 |
1.136 |
1.646 |
0.408 |
| Germination |
F2 |
1.745 |
1.016 |
0.730 |
0.582 |
| Initial_Size |
F2 |
1.803 |
0.721 |
1.082 |
0.400 |
| Establishment |
F2 |
1.645 |
1.175 |
0.470 |
0.714 |
| Growth_Rate |
F2 |
2.531 |
0.812 |
1.719 |
0.321 |
| Flowering_Prob |
F2 |
3.153 |
1.001 |
2.152 |
0.317 |
| Fruit_Per_Plant |
F2 |
2.667 |
1.119 |
1.549 |
0.419 |
# Plot variance components
variance_long = variance_analysis %>%
select(Trait, Generation, Genetic_Variance, Environmental_Variance) %>%
pivot_longer(cols = c("Genetic_Variance", "Environmental_Variance"),
names_to = "Variance_Component",
values_to = "Variance")
ggplot(variance_long, aes(x = Generation, y = Variance, fill = Variance_Component)) +
geom_bar(stat = "identity", position = "stack") +
facet_wrap(~Trait, scales = "free_y") +
scale_fill_manual(values = c("Genetic_Variance" = "lightblue",
"Environmental_Variance" = "lightcoral")) +
labs(title = "Variance Components Across Generations",
x = "Generation",
y = "Variance",
fill = "Variance Component") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Heritability Validation
# Compare expected vs realized heritabilities
heritability_comparison <- data.frame(
Trait = trait_names,
Expected_h2 = h2,
Realized_h2_Parents = variance_analysis$Realized_Heritability[1:n_traits],
Realized_h2_F1 = variance_analysis$Realized_Heritability[(n_traits+1):(2*n_traits)],
Realized_h2_F2 = variance_analysis$Realized_Heritability[(2*n_traits+1):(3*n_traits)]
)
kable(heritability_comparison, digits = 3,
caption = "Expected vs Realized Heritabilities")
Expected vs Realized Heritabilities
| Germination |
Germination |
0.6 |
0.755 |
0.607 |
0.582 |
| Initial_Size |
Initial_Size |
0.5 |
0.583 |
0.445 |
0.400 |
| Establishment |
Establishment |
0.7 |
0.605 |
0.726 |
0.714 |
| Growth_Rate |
Growth_Rate |
0.4 |
0.268 |
0.369 |
0.321 |
| Flowering_Prob |
Flowering_Prob |
0.3 |
0.280 |
0.278 |
0.317 |
| Fruit_Per_Plant |
Fruit_Per_Plant |
0.4 |
0.289 |
0.408 |
0.419 |
# Plot heritability comparison
herit_long <- heritability_comparison |>
pivot_longer(cols = -Trait, names_to = "Heritability_Type", values_to = "Heritability")
ggplot(herit_long, aes(x = Trait, y = Heritability, fill = Heritability_Type)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("Expected_h2" = "gold",
"Realized_h2_Parents" = "red",
"Realized_h2_F1" = "green",
"Realized_h2_F2" = "blue")) +
labs(title = "Expected vs Realized Heritabilities",
x = "Trait",
y = "Heritability",
fill = "Type") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Size-Dependent Reproductive Traits
# Calculate final size based on initial size and growth rate
trait_data$Final_Size = trait_data$Initial_Size * trait_data$Growth_Rate
# Size-dependent flowering probability
# Larger plants have higher flowering probability
size_effect_flowering = (trait_data$Final_Size - min(trait_data$Final_Size)) /
(max(trait_data$Final_Size) - min(trait_data$Final_Size))
trait_data$Flowering_Prob = trait_data$Flowering_Prob_base * (0.5 + 0.5 * size_effect_flowering)
trait_data$Flowering_Prob = pmin(trait_data$Flowering_Prob, 0.95) # Cap at 95%
# Size-dependent fruit production
# Only flowering plants produce fruit, and larger plants produce more
trait_data$Flowers = rbinom(nrow(trait_data), 1, trait_data$Flowering_Prob)
size_effect_fruit = (trait_data$Final_Size - min(trait_data$Final_Size)) /
(max(trait_data$Final_Size) - min(trait_data$Final_Size))
trait_data$Fruit_Per_Plant = trait_data$Flowers *
(trait_data$Fruit_Per_Plant_base * (0.3 + 0.7 * size_effect_fruit))
print("Size-dependent traits calculated")
## [1] "Size-dependent traits calculated"
print(paste("Proportion flowering:", round(mean(trait_data$Flowers), 3)))
## [1] "Proportion flowering: 0.32"
print(paste("Mean fruits per flowering plant:",
round(mean(trait_data$Fruit_Per_Plant[trait_data$Flowers == 1]), 2)))
## [1] "Mean fruits per flowering plant: 8.42"
Visualization
# Create summary statistics
summary_stats = trait_data |>
get_summary_stats(type = "common")
# Print summary
kable(t(summary_stats), digits = 3, caption = "Trait Summary Statistics")
Trait Summary Statistics
| variable |
Germination |
Initial_Size |
Establishment |
Growth_Rate |
Flowering_Prob |
Fruit_Per_Plant |
Flowering_Prob_base |
Fruit_Per_Plant_base |
Individual |
Final_Size |
Flowers |
| n |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
| min |
0.133 |
6.409 |
0.258 |
2.139 |
0.167 |
0.000 |
0.314 |
4.475 |
1.000 |
17.774 |
0.000 |
| max |
0.841 |
31.038 |
0.827 |
5.930 |
0.561 |
34.240 |
0.732 |
99.989 |
1000.000 |
161.526 |
1.000 |
| median |
0.571 |
13.387 |
0.559 |
3.442 |
0.313 |
0.000 |
0.523 |
16.154 |
500.500 |
46.825 |
0.000 |
| iqr |
0.158 |
4.429 |
0.148 |
0.858 |
0.078 |
5.036 |
0.106 |
8.734 |
499.500 |
21.944 |
1.000 |
| mean |
0.564 |
13.903 |
0.553 |
3.545 |
0.320 |
2.693 |
0.522 |
17.766 |
500.500 |
49.915 |
0.320 |
| sd |
0.117 |
3.676 |
0.103 |
0.649 |
0.058 |
4.685 |
0.073 |
8.093 |
288.819 |
18.299 |
0.467 |
| se |
0.004 |
0.116 |
0.003 |
0.021 |
0.002 |
0.148 |
0.002 |
0.256 |
9.133 |
0.579 |
0.015 |
| ci |
0.007 |
0.228 |
0.006 |
0.040 |
0.004 |
0.291 |
0.005 |
0.502 |
17.923 |
1.136 |
0.029 |
# Plot 1: Distribution of key traits
p1 = ggplot(trait_data, aes(x = Germination)) +
geom_histogram(bins = 30, alpha = 0.7, fill = "skyblue") +
labs(title = "Germination Rate Distribution", x = "Germination Probability", y = "Count") +
theme_doc()
p2 = ggplot(trait_data, aes(x = Final_Size)) +
geom_histogram(bins = 30, alpha = 0.7, fill = "lightgreen") +
labs(title = "Final Size Distribution", x = "Final Size (mm)", y = "Count") +
theme_doc()
p3 = ggplot(trait_data, aes(x = Flowering_Prob)) +
geom_histogram(bins = 30, alpha = 0.7, fill = "orange") +
labs(title = "Flowering Probability Distribution", x = "Flowering Probability", y = "Count") +
theme_doc()
p4 = ggplot(trait_data, aes(x = Fruit_Per_Plant)) +
geom_histogram(bins = 30, alpha = 0.7, fill = "pink") +
labs(title = "Fruit Production Distribution", x = "Fruits per Plant", y = "Count") +
theme_doc()
(p1 + p2)/( p3 + p4)

# Plot 2: Size-dependent relationships
p5 = ggplot(trait_data, aes(x = Final_Size, y = Flowering_Prob)) +
geom_point(alpha = 0.6, color = "blue") +
geom_smooth(method = "loess", color = "red") +
labs(title = "Size vs Flowering Probability",
x = "Final Size (mm)", y = "Flowering Probability") +
theme_doc()
p6 = ggplot(trait_data[trait_data$Flowers == 1, ],
aes(x = Final_Size, y = Fruit_Per_Plant)) +
geom_point(alpha = 0.6, color = "darkgreen") +
geom_smooth(method = "loess", color = "red") +
labs(title = "Size vs Fruit Production (Flowering Plants Only)",
x = "Final Size (mm)", y = "Fruits per Plant") +
theme_doc()
p5 + p6

# Plot 3: Trait correlations
cor_traits = trait_data |>
select(Germination, Initial_Size, Establishment, Growth_Rate,
Final_Size, Flowering_Prob, Fruit_Per_Plant) |>
cor()
# Create correlation heatmap
cor_df = as.data.frame(as.table(cor_traits))
names(cor_df) = c("Trait1", "Trait2", "Correlation")
ggplot(cor_df, aes(Trait1, Trait2, fill = Correlation)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1)) +
theme_doc() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Trait Correlation Matrix")

Fitness and Selection Analysis
# Calculate composite fitness measure
# Fitness = Germination × Establishment × (Fruit_Per_Plant + 1)
trait_data$Fitness = trait_data$Germination *
trait_data$Establishment *
(trait_data$Fruit_Per_Plant + 1)
# Standardize fitness
trait_data$Fitness_Std = scale(trait_data$Fitness)[,1]
# Selection analysis
selection_summary = trait_data |>
summarise(
Mean_Fitness = mean(Fitness),
Var_Fitness = var(Fitness),
CV_Fitness = sd(Fitness)/mean(Fitness),
Prop_Reproductive = mean(Flowers),
Mean_Reproductive_Success = mean(Fruit_Per_Plant[Flowers == 1])
)
kable(selection_summary, digits = 3, caption = "Population Fitness Summary")
Population Fitness Summary
| 1.147 |
2.202 |
1.293 |
0.32 |
8.416 |
# Plot fitness distribution
ggplot(trait_data, aes(x = Fitness)) +
geom_histogram(bins = 30, alpha = 0.7, fill = "purple") +
geom_vline(xintercept = mean(trait_data$Fitness), color = "red", linetype = "dashed") +
labs(title = "Fitness Distribution",
subtitle = paste("Mean fitness =", round(mean(trait_data$Fitness), 3)),
x = "Composite Fitness", y = "Count") +
theme_doc()

Save Results
# Save the population data
#write.csv(trait_data, "data/F2_population_traits.csv", row.names = FALSE)
# Save genetic values matrix
#write.csv(gv_matrix, "data/F2_genetic_values.csv", row.names = FALSE)
print("Results saved to CSV files:")
## [1] "Results saved to CSV files:"
print("- F2_population_traits.csv: All trait values and derived measures")
## [1] "- F2_population_traits.csv: All trait values and derived measures"
print("- F2_genetic_values.csv: Raw genetic values from AlphaSimR")
## [1] "- F2_genetic_values.csv: Raw genetic values from AlphaSimR"
Summary
In this simulation we successfully generated an F2 population of 1000
individuals with the following characteristics:
Genetic Architecture: Each trait controlled by
100 QTL with exponentially decaying effect sizes, creating realistic
genetic architecture where few loci have large effects.
Life History Traits:
- Germination probability (0-1)
- Initial and final size (cm)
- Establishment probability (0-1)
- Growth rate multiplier
- Size-dependent flowering probability
- Size-dependent fruit production
Key assumptions:
- Larger plants have higher flowering probability and fruit
production
- Only flowering plants produce fruit
- Composite fitness incorporates multiple life stages